#######################################################################
##################### Metric Weather variables ########################
#######################################################################

rm(list=ls())
# working directory should be folder extr_weather_rep:
getwd()

# for specification curve analysis
source("code/spec_chart_function.R")

# libraries
library(rio)
library(lfe)

# get dataset with absolute values ####
resp <- import("data/respondents_local.csv")
# this dataset is not public

# add data on participation day
resp2 <- import("data/respondents_final.csv")
resp2 <- resp2[,c("PubId", "w1_part_day", "w4_part_day",
                  "w4_q15x3_rev", "w1_q16x4_rev",
                  "step3_index_w4", "step3_index_w1",
                  "w4_q5x9", "w1_q5x9",
                  "w4_q29", "w1_q25",
                  "w4_q30", "w1_q26",
                  "policy_pref_w4", "policy_pref_w1",
                  "pid_green_w4", "pid_green_w1")]
resp <- left_join(resp, resp2)
rm(resp2)

# this dataset contains:
# temp max local for each day in 2018-2019
# temp mean local for each day in 2018-2019
# precipitation local for each day in 2018-2019

# when did respondents participate in 2018 / 2019?

resp <- resp %>% mutate_if(is.character, as.numeric)

# set weather variables to NA depending on when respondent answered.
# max daily temp
for (day in 1:365) {
  for (i in 1:nrow(resp)){
    variable_name <- paste0("temp_2018_", day, "_loc")
    print(variable_name)
    if (day <= resp$w1_part_day[i]) { 
      resp[i, variable_name] <- NA 
    }
    variable_name2 <- paste0("temp_2019_", day, "_loc")
    if (day >= resp$w4_part_day[i]) {
      resp[i, variable_name2] <- NA   
    }
  }
}
#mean daily temp
for (day in 1:365) {
  for (i in 1:nrow(resp)){
    variable_name <- paste0("temp_2018_", day, "_loc_mean")
    print(variable_name)
    if (day <= resp$w1_part_day[i]) { 
      resp[i, variable_name] <- NA 
    }
    variable_name2 <- paste0("temp_2019_", day, "_loc_mean")
    if (day >= resp$w4_part_day[i]) {
      resp[i, variable_name2] <- NA   
    }
  }
}

#daily prec
for (day in 1:365) {
  for (i in 1:nrow(resp)){
    variable_name <- paste0("rain_2018_", day, "_loc")
    print(variable_name)
    if (day <= resp$w1_part_day[i]) { 
      resp[i, variable_name] <- NA 
    }
    variable_name2 <- paste0("rain_2019_", day, "_loc")
    if (day >= resp$w4_part_day[i]) {
      resp[i, variable_name2] <- NA   
    }
  }
}


# average of max/mean/prec for days between waves

# to calculate averages and sd of daily max temps
start <- which(names(resp) == "temp_2018_1_loc")
end <- which(names(resp) == "temp_2019_365_loc")
resp$temp_max_mean_w1w4 <- apply(resp[,start:end], 1, mean, na.rm = T)
resp$temp_max_median_w1w4 <- apply(resp[,start:end], 1, median, na.rm = T)
resp$temp_max_sd_w1w4 <- apply(resp[,start:end], 1, sd, na.rm = T)

# to calculate averages and sd of daily mean temps
start <- which(names(resp) == "temp_2018_1_loc_mean")
end <- which(names(resp) == "temp_2019_365_loc_mean")
resp$temp_mean_mean_w1w4 <- apply(resp[,start:end], 1, mean, na.rm = T)
resp$temp_mean_median_w1w4 <- apply(resp[,start:end], 1, median, na.rm = T)
resp$temp_mean_sd_w1w4 <- apply(resp[,start:end], 1, sd, na.rm = T)

# to calculate averages and sd of daily precipitation
start <- which(names(resp) == "rain_2018_1_loc")
end <- which(names(resp) == "rain_2019_365_loc")
resp$rain_mean_w1w4 <- apply(resp[,start:end], 1, mean, na.rm = T)
resp$rain_median_w1w4 <- apply(resp[,start:end], 1, median, na.rm = T)
resp$rain_sd_w1w4 <- apply(resp[,start:end], 1, sd, na.rm = T)

# keep variables for analysis
resp <- resp[, -grep("_loc|sd", names(resp))]

# exclude all respondents that are outside of Switzerland
resp <- filter(resp, PubId != 318)
resp <- filter(resp, PubId != 11222)
resp <- filter(resp, PubId != 23730)
resp <- filter(resp, PubId != 30713)

# absolute measures:  operationalisations
ggplot(resp, aes(temp_max_mean_w1w4, after_stat(scaled))) + geom_density(fill= "red", alpha=0.1) +
  geom_density(aes(temp_max_median_w1w4, after_stat(scaled)), fill= "red", alpha=0.1) +
  geom_density(aes(temp_mean_mean_w1w4, after_stat(scaled)), fill= "red", alpha=0.1) +
  geom_density(aes(temp_mean_median_w1w4, after_stat(scaled)), fill= "red", alpha=0.1) +
  geom_density(aes(rain_mean_w1w4, after_stat(scaled)), fill= "grey", alpha=0.4) +
  geom_density(aes(rain_median_w1w4, after_stat(scaled)), fill= "grey", alpha=0.4) + 
  scale_x_continuous(expand = c(0, 0)) + 
  scale_y_continuous(limits = c(0, 1.05), expand = c(0, 0)) +
  theme_bw() + labs(x = "mm/day (grey) or Degree Celcius (red)", 
                    y = "scaled density", 
  title = "Distribution of absolute measures")
ggsave("plots/distr_abs.png",
       width = 6, height = 3, dpi = 400)

# specification curve analysis ####

# variables
n_start <- which(names(resp) == "temp_max_mean_w1w4")
n_end <- which(names(resp) == "rain_median_w1w4")
treat <- names(resp[,c(n_start:n_end)])

#######################################
# One plot per step ####
#######################################

# Set-up ###
graph_height <- 3

#treat_labels <- str_remove(treat,"extreme_") %>% str_remove(.,"_percent") %>% str_remove(.,"_w1w4")
treat_labels <- c("Mean Temp (daily max)", "Median Temp (daily max)",
                  "Mean Temp (daily mean)", "Median Temp (daily mean)",
                  "Mean Prec", "Median Prec")
treat_labels_list <- list("Temperature:" = treat_labels[1:4],
                          "Precipitation:" = treat_labels[5:6])

## Step 2 - no covariates ####

# 0. Bring data into panel format
resp_step2 <- resp[,c("PubId", treat, "w4_q15x3_rev", "w1_q16x4_rev")]
resp_step2 <- resp_step2 %>% rename(., c(outcome_w4 = w4_q15x3_rev,
                                         outcome_w1 = w1_q16x4_rev))
# rename all weather variables to _w4
resp_step2 <- resp_step2 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step2$new_variable <- 0
  names(resp_step2)[names(resp_step2)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step2_long <- pivot_longer(resp_step2, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))

resp_step2_long$wave <- str_remove(resp_step2_long$wave, "_w")
resp_step2_long$PubId <- as.character(resp_step2_long$PubId)

# 1. Baseline regression

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step2_long)
  # Export
  data.frame(coef=coef(reg)[[grep("_w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("_w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step2_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       ylim = c(-0.75, 0.2),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step2\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15)
dev.off()

## Step 3 - no covariates ####

# 0. Bring data into panel format
resp_step3 <- resp[,c("PubId", treat, "step3_index_w4", "step3_index_w1")]
resp_step3 <- resp_step3 %>% rename(., c(outcome_w4 = step3_index_w4,
                                         outcome_w1 = step3_index_w1))

# rename all weather variables to _w4
resp_step3 <- resp_step3 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step3$new_variable <- 0
  names(resp_step3)[names(resp_step3)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step3_long <- pivot_longer(resp_step3, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step3_long$wave <- str_remove(resp_step3_long$wave, "_w")
resp_step3_long$PubId <- as.character(resp_step3_long$PubId)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step3_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step3_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on step3\n(5-point scale)",       
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.8, 0.1))
dev.off()

## Step 4 MIP - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("PubId", treat, "w4_q5x9", "w1_q5x9")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q5x9,
                                         outcome_w1 = w1_q5x9))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$PubId <- as.character(resp_step4_long$PubId)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_mip_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.4,0.2))
dev.off()

## Step 4 cand_loc - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("PubId", treat, "w4_q29", "w1_q25")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q29,
                                         outcome_w1 = w1_q25))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$PubId <- as.character(resp_step4_long$PubId)

# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_cand_loc_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.2, +0.4))
dev.off()

## Step 4 cand_nat - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("PubId", treat, "w4_q30", "w1_q26")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = w4_q30,
                                         outcome_w1 = w1_q26))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$PubId <- as.character(resp_step4_long$PubId)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_cand_nat_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       col.est=c("grey80","royalblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.3, 0.1))
dev.off()

## Step 4 policy_pref - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("PubId", treat, "policy_pref_w4", "policy_pref_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = policy_pref_w4,
                                         outcome_w1 = policy_pref_w1))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$PubId <- as.character(resp_step4_long$PubId)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins
pdf("plots/spec_curves/step4_policy_pref_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.4, 0.1))
dev.off()

## Step 4 pid_green - no covariates ####

# 0. Bring data into panel format
resp_step4 <- resp[,c("PubId", treat, "pid_green_w4", "pid_green_w1")]
resp_step4 <- resp_step4 %>% rename(., c(outcome_w4 = pid_green_w4,
                                         outcome_w1 = pid_green_w1))

# rename all weather variables to _w4
resp_step4 <- resp_step4 %>%
  rename_at(
    vars(treat), funs(str_c(treat, "_w4"))
  )

# create empty variables for pre-treatment value (_w1)
for (i in 1:length(treat)) {
  # create empty variable to fill
  resp_step4$new_variable <- 0
  names(resp_step4)[names(resp_step4)=="new_variable"] <- paste0(treat[i], "_w1", sep ="")
}

# long format for panel data analysis
resp_step4_long <- pivot_longer(resp_step4, 
                                cols = !PubId,
                                names_pattern = "(.*)(_w.$)",
                                names_to = c(".value","wave"))


resp_step4_long$wave <- str_remove(resp_step4_long$wave, "_w")
resp_step4_long$PubId <- as.character(resp_step4_long$PubId)


# 1. Baseline regression with all covariates

# dependent variable in combination with all treatment variables
flist <- lapply(treat, function(x) paste("outcome ~ ", paste(x, "| as.factor(wave) + as.factor(PubId) | 0 | PubId")))

# 2. Run models and export key info
regs1 <- lapply(flist, function(f) {
  print(f)
  # Run model
  reg <- felm(as.formula(f), resp_step4_long)
  # Export
  data.frame(coef=coef(reg)[[grep("w1w4", names(coef(reg)))]], 
             se=sqrt(diag(vcov(reg)))[[grep("w1w4", names(coef(reg)))]], 
             r2=summary(reg)$r.squared,
             # extract time fixed effects
             fe_w4 = getfe(reg) %>% .$effect %>% .[2])
})
regs1 <- data.frame(do.call("rbind",regs1))

regs1$l1 <-  regs1$coef - qnorm(0.975)*regs1$se
regs1$h1 <-  regs1$coef + qnorm(0.975)*regs1$se
# significant if both are negative or both are positive
regs1$sig <- 0 
regs1$sig[regs1$l1 < 0 & regs1$h1 < 0 |regs1$l1 > 0 & regs1$h1 > 0] <- 1
regs1$l1 <- NULL
regs1$h1 <- NULL

# 3. Prepare data for plotting
regs2 <- lapply(flist, function(t) str_detect(t, c("outcome", str_c(treat, "\\b"))))
regs2 <- data.frame(do.call("rbind",regs2))
names(regs2) <- c("outcome", treat)
data <- cbind(regs1,regs2)
rm(regs1, regs2)
data$outcome <- NULL # remove indicator for coefficient of interest present in all models
# remove R2 from table but keep it as a separate object
r2 <- data$r2
data$r2  <- NULL
sig <- data$sig
data$sig <- NULL
data$fe_w4 <- NULL

# 4. Labels
head(data)
# Enter labels in order they appear in table
labels <- treat_labels_list

# 5. Plots

# Looks better when there is an outer margins

pdf("plots/spec_curves/step4_pid_green_ordered_90_metric.pdf", 
    width= 10, height = graph_height)
par(oma=c(1,1,1,1))
schart(data, labels, 
       order="increasing", 
       highlight = which(sig==1),
       ci = c(.95, .90),
       col.est=c("grey60","royalblue"),
       col.est2=c("grey85","lightblue"),
       col.dot=c("grey60","grey95","grey95","royalblue"),
       ylab="effect on Step4\n(5-point scale)",        
       heights=c(.6,1),
       leftmargin = 15,
       ylim = c(-0.1, 0.5))
dev.off()